Here we have an initial foray into plotting some of the covariate values, such as RNA-binding protein associations, with respect to the readout of partition coefficients. Since there are three partitions that are compositional, we can use ternary plots to represent the readout space.
library(magrittr)
library(tidyverse)
library(ggtern)
library(ggridges)
library(gtsummary)
library(readxl)
library(cowplot)
set.seed(20220602)
RDS_IN="data/df_tiger_clean.Rds"
LOC_COLORS=c("ER"="#4B74A6", "TG"="#E4E4CA", "CY"="#949494", "DF"="#CCCCCC")
LOC_COLORS_DARK=c("ER"="#4B74A6", "TG"="#C9C497", "CY"="#949494", "DF"="#CCCCCC")
MIN_PCON_CY=1.15
MIN_PCON_ER=1.25
MIN_PCON_TG=1.30
MIN_PCON_DIFF=0.15
df_tiger <- readRDS(RDS_IN)
First, validate that the partition coefficients for each gene sum to 1.
df_pcos <- df_tiger %>%
mutate(pco_sum=pco_tg+pco_er+pco_cy) %>%
select(gene_name, refseq_id, category, pco_sum, pco_tg, pco_er, pco_cy)
ggplot(df_pcos, aes(x=pco_sum)) +
geom_histogram(fill='lightgrey', color='black', binwidth=0.005) +
geom_histogram(data=filter(df_pcos, map_lgl(pco_sum, all.equal, current=1)), fill='steelblue', color='black', binwidth=0.005) +
labs(x="Sum of Partition Coefficients", y="Genes") +
scale_y_continuous(expand=c(0,0,0.1,0)) +
theme_light()
Seems to be correct!
Normalized coefficients are derived by by dividing by the medians. Let’s verify.
df_pcons <- df_tiger %>%
select(gene_name, refseq_id, category,
pco_cy, pco_er, pco_tg,
npco_cy, npco_er, npco_tg) %>%
mutate(pcon_cy=pco_cy/median(pco_cy),
pcon_er=pco_er/median(pco_er),
pcon_tg=pco_tg/median(pco_tg))
df_pcons %$% all.equal(npco_cy, pcon_cy)
## [1] "Mean relative difference: 1.519713e-08"
df_pcons %$% all.equal(npco_er, pcon_er)
## [1] "Mean relative difference: 0.0001565953"
df_pcons %$% all.equal(npco_tg, pcon_tg)
## [1] "Mean relative difference: 0.000138522"
Looks like there are some differences. Let’s check that these really are uniformly small, and not a few large differences.
df_pcons %$% summary(npco_cy - pcon_cy)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.494e-08 -1.831e-08 -1.520e-08 -1.523e-08 -1.209e-08 -1.636e-11
df_pcons %$% summary(npco_er - pcon_er)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.878e-07 1.371e-04 1.566e-04 1.602e-04 1.787e-04 3.596e-04
df_pcons %$% summary(npco_tg - pcon_tg)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.687e-05 1.190e-04 1.385e-04 1.440e-04 1.630e-04 4.134e-04
df_pcons %>%
ggplot() +
geom_histogram(aes(x=npco_cy-pcon_cy), binwidth=1e-5, position='stack',
fill=LOC_COLORS["CY"], color='black') +
geom_histogram(aes(x=npco_er-pcon_er), binwidth=1e-5, position='stack',
fill=LOC_COLORS["ER"], color='black') +
geom_histogram(aes(x=npco_tg-pcon_tg), binwidth=1e-5, position='stack',
fill=LOC_COLORS["TG"], color='black') +
scale_y_continuous(expand=c(0,0,0.05,0)) +
labs(x="Difference (Reported - Computed)", y="Genes") +
theme_light()
They are small, but they are biased. Were the medians rounded prior to dividing?
df_pcons %$% median(pco_cy)
## [1] 0.3426
df_pcons %$% median(pco_er)
## [1] 0.3021473
df_pcons %$% median(pco_tg)
## [1] 0.3346464
Yes. This is consistent with having rounded the medians to the fourth sigfig prior to computing the normalized values. So, we should get exact matching with…
df_pcons2 <- df_tiger %>%
select(gene_name, refseq_id, category,
pco_cy, pco_er, pco_tg,
npco_cy, npco_er, npco_tg) %>%
mutate(pcon_cy=pco_cy/round(median(pco_cy), digits=4),
pcon_er=pco_er/round(median(pco_er), digits=4),
pcon_tg=pco_tg/round(median(pco_tg), digits=4))
df_pcons2 %$% all.equal(npco_cy, pcon_cy)
## [1] TRUE
df_pcons2 %$% all.equal(npco_er, pcon_er)
## [1] TRUE
df_pcons2 %$% all.equal(npco_tg, pcon_tg)
## [1] TRUE
Perfect!
We were told that categorization was based on the normalized partition coefficients. We’ll see how it matches up on the raw coefficients first, and then check that normalized coefficients properly assign the categories.
df_pcos %>%
mutate(pco_sum=NULL) %>%
pivot_longer(cols=starts_with("pco", ignore.case=FALSE),
names_to="partition", names_prefix="pco_",
values_to="coefficient") %>%
group_by(gene_name, refseq_id) %>%
slice_max(coefficient) %>%
ungroup() %>%
mutate(partition=factor(toupper(partition), levels=c("ER", "TG", "CY"))) %>%
tbl_summary(by=category, include=partition,
label=c(category="Category", partition="Max Partition (raw)")) %>%
modify_header(label ~ "**Categorization**")
| Categorization | DF, N = 3,3691 | ER, N = 9191 | TG, N = 1,2461 | CY, N = 1,4811 |
|---|---|---|---|---|
| Max Partition (raw) | ||||
| ER | 435 (13%) | 864 (94%) | 0 (0%) | 0 (0%) |
| TG | 1,297 (38%) | 51 (5.5%) | 1,246 (100%) | 0 (0%) |
| CY | 1,637 (49%) | 4 (0.4%) | 0 (0%) | 1,481 (100%) |
| 1 n (%) | ||||
Okay, that’s a little noisy, but we know that the actual categories were assigned using the normalized coefficients. So, let’s look at them…
df_tiger %>%
pivot_longer(cols=starts_with("npco_"),
names_to="partition", names_prefix="npco_",
values_to="coefficient") %>%
group_by(gene_name, refseq_id) %>%
slice_max(coefficient) %>%
ungroup() %>%
mutate(partition=factor(toupper(partition), levels=c("ER", "TG", "CY"))) %>%
tbl_summary(by=category, include=partition,
label=c(category="Category", partition="Max Partition (norm.)")) %>%
modify_header(label ~ "**Categorization**")
| Categorization | DF, N = 3,3691 | ER, N = 9191 | TG, N = 1,2461 | CY, N = 1,4811 |
|---|---|---|---|---|
| Max Partition (norm.) | ||||
| ER | 1,104 (33%) | 919 (100%) | 0 (0%) | 0 (0%) |
| TG | 1,038 (31%) | 0 (0%) | 1,246 (100%) | 0 (0%) |
| CY | 1,227 (36%) | 0 (0%) | 0 (0%) | 1,481 (100%) |
| 1 n (%) | ||||
This shows consistency with maximum category.
Let’s look at the categories visualized in ternary plots.
The first plot shows a näive 50% cutoff line. That is, any genes outside the central triangle have 50% of their FPKMs coming from one subcellular compartment. The second plot includes cutoff lines that are chosen to separate the classifications.
df_tiger %>%
ggtern(aes(x=pco_er, y=pco_cy, z=pco_tg, color=category)) +
geom_point(size=0.2, alpha=0.4) +
geom_Lline(Lintercept=0.50, linetype='dashed') +
geom_Rline(Rintercept=0.50, linetype='dashed') +
geom_Tline(Tintercept=0.50, linetype='dashed') +
scale_color_manual(values=LOC_COLORS_DARK) +
labs(x="ER", y="CY", z="TG") +
guides(color="none") +
theme_bw()
df_tiger %>%
ggtern(aes(x=pco_er, y=pco_cy, z=pco_tg, color=category)) +
geom_point(size=0.2, alpha=0.4) +
geom_Lline(Lintercept=0.38, linetype='dashed') +
geom_Rline(Rintercept=0.42, linetype='dashed') +
geom_Tline(Tintercept=0.43, linetype='dashed') +
scale_color_manual(values=LOC_COLORS_DARK) +
labs(x="ER", y="CY", z="TG") +
guides(color="none") +
theme_bw()
ggsave("img/pcos_raw_ternary.pdf", width=5, height=5, dpi=300)
There is some non-linearity in there, but one may not notice it if not looking closely. This is expected because we assign categories using the normalized coefficients.
Three plots show 50% cutoffs, 40% cutoffs, and empirical (best fit) cutoffs.
df_tiger %>%
ggtern(aes(x=npco_er, y=npco_cy, z=npco_tg, color=category)) +
geom_point(size=0.2, alpha=0.4) +
geom_Lline(Lintercept=0.5, linetype='dashed') +
geom_Rline(Rintercept=0.5, linetype='dashed') +
geom_Tline(Tintercept=0.5, linetype='dashed') +
scale_color_manual(values=LOC_COLORS_DARK) +
labs(x="ER", y="CY", z="TG") +
guides(color='none') +
theme_bw()
df_tiger %>%
ggtern(aes(x=npco_er, y=npco_cy, z=npco_tg, color=category)) +
geom_point(size=0.2, alpha=0.4) +
geom_Lline(Lintercept=0.4, linetype='dashed') +
geom_Rline(Rintercept=0.4, linetype='dashed') +
geom_Tline(Tintercept=0.4, linetype='dashed') +
scale_color_manual(values=LOC_COLORS_DARK) +
labs(x="ER", y="CY", z="TG") +
guides(color='none') +
theme_bw()
df_tiger %>%
ggtern(aes(x=npco_er, y=npco_cy, z=npco_tg, color=category)) +
geom_point(size=0.2, alpha=0.4) +
geom_Lline(Lintercept=0.405, linetype='dashed') +
geom_Rline(Rintercept=0.40, linetype='dashed') +
geom_Tline(Tintercept=0.41, linetype='dashed') +
scale_color_manual(values=LOC_COLORS_DARK) +
labs(x="ER", y="CY", z="TG") +
guides(color='none') +
theme_bw()
ggsave("img/pcos_norm_ternary.pdf", width=5, height=5, dpi=300)
df_tiger %>%
ggtern(aes(x=npco_er, y=npco_cy, z=npco_tg, color=category)) +
geom_point(size=0.4, alpha=0.4) +
geom_Lline(Lintercept=0.405, linetype='dashed') +
geom_Rline(Rintercept=0.40, linetype='dashed') +
geom_Tline(Tintercept=0.41, linetype='dashed') +
scale_color_manual(values=LOC_COLORS_DARK) +
labs(x="ER", y="CY", z="TG") +
guides(color='none') +
theme_bw()
ggsave("img/pcos_norm_ternary_alt1.pdf", width=5, height=5, dpi=300)
df_tiger %>%
ggtern(aes(x=npco_er, y=npco_cy, z=npco_tg, color=category)) +
geom_point(size=0.8, alpha=0.6) +
geom_Lline(Lintercept=0.405, linetype='dashed') +
geom_Rline(Rintercept=0.40, linetype='dashed') +
geom_Tline(Tintercept=0.41, linetype='dashed') +
scale_color_manual(values=LOC_COLORS_DARK) +
labs(x="ER", y="CY", z="TG") +
guides(color='none') +
theme_bw()
ggsave("img/pcos_norm_ternary_alt2.pdf", width=5, height=5, dpi=300)
Here we plot some of the covariates (e.g., CLIP peak counts) using the ternary plots. We both scale the point sizes and the color based on the covariate values. This helps to deemphasize all the those genes that have low values in favor of those with high values. Overall, I think the individual plots are not so clear to interpret, however, comparing the plots of different covariates seems to make for a nice contrast.
The ridge plots are more for modelling purposes, to explore what type of transformation might be best to apply before input the covariate into the regression model.
df_tiger %>%
mutate(pscore_tis11b=2*(sqrt(TIS11B_CLIP) - sqrt(mean(TIS11B_CLIP, na.rm=TRUE)))) %>%
arrange(TIS11B_CLIP) %>%
ggtern(aes(x=npco_er, y=npco_cy, z=npco_tg, color=TIS11B_CLIP, size=TIS11B_CLIP)) +
geom_point(alpha=0.3, pch=16) +
geom_Lline(Lintercept=0.4, linetype='dashed') +
geom_Rline(Rintercept=0.4, linetype='dashed') +
geom_Tline(Tintercept=0.4, linetype='dashed') +
scale_color_viridis_c(option="B", trans="sqrt") +
scale_size_area() +
labs(x="ER", y="CY", z="TG") +
guides(size='none') +
theme_light()
ggsave("img/tis11b_ternary.pdf", width=8, height=6, dpi=300)
This is clearly non-cytoplasmic, with leaning toward the TG side.
RBP_CLIP <- "TIS11B_CLIP"
df_tiger %>%
ggplot(aes(x=.data[[RBP_CLIP]], y=category, fill=category)) +
geom_density_ridges(alpha=0.95) +
scale_fill_manual(values=LOC_COLORS) +
labs(x="Raw Counts") +
theme_bw()
df_tiger %>%
mutate(pscore=2*(sqrt(.[[RBP_CLIP]]) - sqrt(mean(.[[RBP_CLIP]], na.rm=TRUE)))) %>%
ggplot(aes(x=pscore, y=category, fill=category)) +
geom_density_ridges(alpha=0.95) +
scale_fill_manual(values=LOC_COLORS) +
labs(x="Variance Stabilizing Transform") +
theme_bw()
df_tiger %>%
mutate(zscore=scale(.[[RBP_CLIP]])) %>%
ggplot(aes(x=zscore, y=category, fill=category)) +
geom_density_ridges(alpha=0.95) +
scale_fill_manual(values=LOC_COLORS) +
labs(x="Z-Score") +
theme_bw()
df_tiger %>%
mutate(pscore_tis11b=2*(sqrt(TIA1_L1_CLIP) - sqrt(mean(TIA1_L1_CLIP, na.rm=TRUE)))) %>%
arrange(TIA1_L1_CLIP) %>%
ggtern(aes(x=npco_er, y=npco_cy, z=npco_tg, color=TIA1_L1_CLIP, size=TIA1_L1_CLIP)) +
geom_point(alpha=0.3, pch=16) +
geom_Lline(Lintercept=0.4, linetype='dashed') +
geom_Rline(Rintercept=0.4, linetype='dashed') +
geom_Tline(Tintercept=0.4, linetype='dashed') +
scale_color_viridis_c(option="B", trans="sqrt") +
scale_size_area() +
labs(x="ER", y="CY", z="TG") +
guides(size='none') +
theme_light()
ggsave("img/tial1_ternary.pdf", width=8, height=6, dpi=300)
RBP_CLIP <- "TIA1_L1_CLIP"
df_tiger %>%
ggplot(aes(x=.data[[RBP_CLIP]], y=category, fill=category)) +
geom_density_ridges(alpha=0.95) +
scale_fill_manual(values=LOC_COLORS) +
labs(x="Raw Counts") +
theme_bw()
df_tiger %>%
mutate(pscore=2*(sqrt(.[[RBP_CLIP]]) - sqrt(mean(.[[RBP_CLIP]], na.rm=TRUE)))) %>%
ggplot(aes(x=pscore, y=category, fill=category)) +
geom_density_ridges(alpha=0.95) +
scale_fill_manual(values=LOC_COLORS) +
labs(x="Variance Stabilizing Transform") +
theme_bw()
df_tiger %>%
mutate(zscore=scale(.[[RBP_CLIP]])) %>%
ggplot(aes(x=zscore, y=category, fill=category)) +
geom_density_ridges(alpha=0.95) +
scale_fill_manual(values=LOC_COLORS) +
labs(x="Z-Score") +
theme_bw()
df_tiger %>%
mutate(pscore_tis11b=2*(sqrt(PUM2_CLIP) - sqrt(mean(PUM2_CLIP, na.rm=TRUE)))) %>%
arrange(PUM2_CLIP) %>%
ggtern(aes(x=npco_er, y=npco_cy, z=npco_tg, color=PUM2_CLIP, size=PUM2_CLIP)) +
geom_point(alpha=0.3, pch=16) +
geom_Lline(Lintercept=0.4, linetype='dashed') +
geom_Rline(Rintercept=0.4, linetype='dashed') +
geom_Tline(Tintercept=0.4, linetype='dashed') +
scale_color_viridis_c(option="B", trans="sqrt") +
scale_size_area() +
labs(x="ER", y="CY", z="TG") +
guides(size='none') +
theme_light()
ggsave("img/pum2_clip_ternary.pdf", width=8, height=6, dpi=300)
RBP_CLIP <- "PUM2_CLIP"
df_tiger %>%
ggplot(aes(x=.data[[RBP_CLIP]], y=category, fill=category)) +
geom_density_ridges(alpha=0.95) +
scale_fill_manual(values=LOC_COLORS) +
labs(x="Raw Counts") +
theme_bw()
df_tiger %>%
mutate(pscore=2*(sqrt(.[[RBP_CLIP]]) - sqrt(mean(.[[RBP_CLIP]], na.rm=TRUE)))) %>%
ggplot(aes(x=pscore, y=category, fill=category)) +
geom_density_ridges(alpha=0.95) +
scale_fill_manual(values=LOC_COLORS) +
labs(x="Variance Stabilizing Transform") +
theme_bw()
df_tiger %>%
mutate(zscore=scale(.[[RBP_CLIP]])) %>%
ggplot(aes(x=zscore, y=category, fill=category)) +
geom_density_ridges(alpha=0.95) +
scale_fill_manual(values=LOC_COLORS) +
labs(x="Z-Score") +
theme_bw()
df_tiger %>%
mutate(pscore=2*(sqrt(Pumilio_3UTR_num) - sqrt(mean(Pumilio_3UTR_num, na.rm=TRUE)))) %>%
arrange(Pumilio_3UTR_num) %>%
ggtern(aes(x=npco_er, y=npco_cy, z=npco_tg,
color=Pumilio_3UTR_num,
size=Pumilio_3UTR_num)) +
geom_point(alpha=0.3, pch=16) +
geom_Lline(Lintercept=0.4, linetype='dashed') +
geom_Rline(Rintercept=0.4, linetype='dashed') +
geom_Tline(Tintercept=0.4, linetype='dashed') +
scale_color_viridis_c(option="B", trans="sqrt") +
scale_size_area() +
labs(x="ER", y="CY", z="TG") +
guides(size='none') +
theme_light()
ggsave("img/pumilio_3utr_ternary.pdf", width=8, height=6, dpi=300)
df_tiger %>%
mutate(pum_density=1000*Pumilio_3UTR_num/Anno_3UTR_length) %>%
arrange(pum_density) %>%
ggtern(aes(x=npco_er, y=npco_cy, z=npco_tg,
color=pum_density,
size=pum_density)) +
geom_point(alpha=0.3, pch=16) +
geom_Lline(Lintercept=0.4, linetype='dashed') +
geom_Rline(Rintercept=0.4, linetype='dashed') +
geom_Tline(Tintercept=0.4, linetype='dashed') +
scale_color_viridis_c(option="B", trans="sqrt") +
scale_size_area() +
labs(x="ER", y="CY", z="TG") +
guides(size='none') +
theme_light()
ggsave("img/pumilio_density_ternary.pdf", width=8, height=6, dpi=300)
RBP_CLIP <- "Pumilio_3UTR_num"
df_tiger %>%
ggplot(aes(x=.data[[RBP_CLIP]], y=category, fill=category)) +
geom_density_ridges(alpha=0.95) +
scale_fill_manual(values=LOC_COLORS) +
labs(x="Raw Counts") +
theme_bw()
df_tiger %>%
mutate(pscore=2*(sqrt(.[[RBP_CLIP]]) - sqrt(mean(.[[RBP_CLIP]], na.rm=TRUE)))) %>%
ggplot(aes(x=pscore, y=category, fill=category)) +
geom_density_ridges(alpha=0.95) +
scale_fill_manual(values=LOC_COLORS) +
labs(x="Variance Stabilizing Transform") +
theme_bw()
df_tiger %>%
mutate(zscore=scale(.[[RBP_CLIP]])) %>%
ggplot(aes(x=zscore, y=category, fill=category)) +
geom_density_ridges(alpha=0.95) +
scale_fill_manual(values=LOC_COLORS) +
labs(x="Z-Score") +
theme_bw()
df_tiger %>%
mutate(pscore_tis11b=2*(sqrt(TIS11B_CLIP) - sqrt(mean(TIS11B_CLIP, na.rm=TRUE))),
pscore_tial1=2*(sqrt(TIA1_L1_CLIP) - sqrt(mean(TIA1_L1_CLIP, na.rm=TRUE)))) %>%
arrange(pmax(TIS11B_CLIP,TIA1_L1_CLIP)) %>%
ggtern(aes(x=npco_er, y=npco_cy, z=npco_tg)) +
geom_point(aes(color=TIS11B_CLIP, size=TIS11B_CLIP+TIA1_L1_CLIP),
alpha=0.60, pch=16) +
scale_color_gradient(low="#000000", high="#FF0000", trans="log1p") +
geom_point(aes(fill=TIA1_L1_CLIP, size=TIS11B_CLIP+TIA1_L1_CLIP),
alpha=0.12, pch=21) +
scale_fill_gradient(low="#000000", high="#00FF00", trans="log1p") +
geom_Lline(Lintercept=0.4, linetype='dashed') +
geom_Rline(Rintercept=0.4, linetype='dashed') +
geom_Tline(Tintercept=0.4, linetype='dashed') +
scale_size_area() +
labs(x="ER", y="CY", z="TG") +
guides(size='none') +
theme_dark() +
theme(panel.background=element_rect(fill="#000000"))
ggsave("img/tis11b_tial1_ternary.pdf", width=10, height=8, dpi=300)
df_tiger %>%
mutate(pscore_tis11b=2*(sqrt(TIS11B_CLIP) - sqrt(mean(TIS11B_CLIP, na.rm=TRUE))),
pscore_hur=2*(sqrt(Muk_HuR_CLIP) - sqrt(mean(Muk_HuR_CLIP, na.rm=TRUE)))) %>%
arrange(pmax(TIS11B_CLIP,Muk_HuR_CLIP)) %>%
ggtern(aes(x=npco_er, y=npco_cy, z=npco_tg)) +
geom_point(aes(color=TIS11B_CLIP, size=TIS11B_CLIP+Muk_HuR_CLIP),
alpha=0.60, pch=16) +
scale_color_gradient(low="#000000", high="#FF0000", trans="log1p") +
geom_point(aes(fill=Muk_HuR_CLIP, size=TIS11B_CLIP+Muk_HuR_CLIP),
alpha=0.15, pch=21) +
scale_fill_gradient(low="#000000", high="#00FF00", trans="log1p") +
geom_Lline(Lintercept=0.4, linetype='dashed') +
geom_Rline(Rintercept=0.4, linetype='dashed') +
geom_Tline(Tintercept=0.4, linetype='dashed') +
scale_size_area() +
labs(x="ER", y="CY", z="TG") +
guides(size='none') +
theme_dark() +
theme(panel.background=element_rect(fill="#000000"))
ggsave("img/tis11b_hur_ternary.pdf", width=10, height=8, dpi=300)
df_tiger %>%
mutate(pscore_tis11b=2*(sqrt(TIS11B_CLIP) - sqrt(mean(TIS11B_CLIP, na.rm=TRUE))),
pscore_pum12=2*(sqrt(PUM_RIP12) - sqrt(mean(PUM_RIP12, na.rm=TRUE)))) %>%
arrange(pmax(TIS11B_CLIP,PUM_RIP12)) %>%
ggtern(aes(x=npco_er, y=npco_cy, z=npco_tg)) +
geom_point(aes(color=TIS11B_CLIP, size=TIS11B_CLIP+PUM_RIP12),
alpha=0.60, pch=16) +
scale_color_gradient(low="#000000", high="#FF0000", trans="log1p") +
geom_point(aes(fill=PUM_RIP12, size=TIS11B_CLIP+PUM_RIP12),
alpha=0.15, pch=21) +
scale_fill_gradient(low="#000000", high="#00FF00", trans="identity") +
geom_Lline(Lintercept=0.4, linetype='dashed') +
geom_Rline(Rintercept=0.4, linetype='dashed') +
geom_Tline(Tintercept=0.4, linetype='dashed') +
scale_size_area() +
labs(x="ER", y="CY", z="TG") +
guides(size='none') +
theme_dark() +
theme(panel.background=element_rect(fill="#000000"))
ggsave("img/tis11b_pum12_ternary.pdf", width=10, height=8, dpi=300)
Data looks very promising to predict partition coefficients from covariate values. Ridge peaks indicate the variance stabilizing transform is very effective. We may want to combine the VST with a z-score to create model coefficients that are directly comparable.
## R version 4.1.3 (2022-03-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur/Monterey 10.16
##
## Matrix products: default
## BLAS/LAPACK: /Users/mfansler/miniconda3/envs/brms_r41/lib/libopenblasp-r0.3.20.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] cowplot_1.1.1 readxl_1.4.0 gtsummary_1.6.1 ggridges_0.5.3
## [5] ggtern_3.3.5 forcats_0.5.1 stringr_1.4.0 dplyr_1.0.9
## [9] purrr_0.3.4 readr_2.1.2 tidyr_1.2.0 tibble_3.1.7
## [13] ggplot2_3.3.6 tidyverse_1.3.1 magrittr_2.0.3
##
## loaded via a namespace (and not attached):
## [1] httr_1.4.3 sass_0.4.1 viridisLite_0.4.0
## [4] jsonlite_1.8.0 modelr_0.1.8 bslib_0.3.1
## [7] assertthat_0.2.1 highr_0.9 tensorA_0.36.2
## [10] cellranger_1.1.0 yaml_2.3.5 robustbase_0.95-0
## [13] pillar_1.7.0 backports_1.4.1 lattice_0.20-45
## [16] glue_1.6.2 digest_0.6.29 checkmate_2.1.0
## [19] rvest_1.0.2 colorspace_2.0-3 htmltools_0.5.2
## [22] plyr_1.8.7 pkgconfig_2.0.3 broom_0.8.0
## [25] haven_2.5.0 scales_1.2.0 tzdb_0.3.0
## [28] farver_2.1.0 generics_0.1.2 ellipsis_0.3.2
## [31] withr_2.5.0 cli_3.3.0 proto_1.0.0
## [34] crayon_1.5.1 evaluate_0.15 fs_1.5.2
## [37] fansi_1.0.3 broom.helpers_1.7.0 MASS_7.3-57
## [40] xml2_1.3.3 compositions_2.0-4 tools_4.1.3
## [43] hms_1.1.1 lifecycle_1.0.1 munsell_0.5.0
## [46] reprex_2.0.1 compiler_4.1.3 jquerylib_0.1.4
## [49] rlang_1.0.2 grid_4.1.3 gt_0.6.0
## [52] rstudioapi_0.13 labeling_0.4.2 rmarkdown_2.14
## [55] gtable_0.3.0 DBI_1.1.3 R6_2.5.1
## [58] bayesm_3.1-4 gridExtra_2.3 lubridate_1.8.0
## [61] knitr_1.39 fastmap_1.1.0 utf8_1.2.2
## [64] commonmark_1.8.0 latex2exp_0.9.4 stringi_1.7.6
## [67] Rcpp_1.0.8.3 vctrs_0.4.1 DEoptimR_1.0-11
## [70] dbplyr_2.2.0 tidyselect_1.1.2 xfun_0.31
## Conda Environment YAML
name: brms_r41
channels:
- merv
- conda-forge
- bioconda
- defaults
dependencies:
- _r-mutex=1.0.1=anacondar_1
- bwidget=1.9.14=h694c41f_1
- bzip2=1.0.8=h0d85af4_4
- c-ares=1.18.1=h0d85af4_0
- ca-certificates=2022.6.15=h033912b_0
- cairo=1.16.0=h9e0e54b_1010
- cctools_osx-64=973.0.1=h3eff9a4_10
- clang=14.0.4=h694c41f_0
- clang-14=14.0.4=default_h55ffa42_0
- clang_osx-64=14.0.4=h3a95cd4_2
- clangxx=14.0.4=default_h55ffa42_0
- clangxx_osx-64=14.0.4=he1dbc44_2
- compiler-rt=14.0.4=h7fcd477_0
- compiler-rt_osx-64=14.0.4=h6df654d_0
- curl=7.83.1=h23f1065_0
- expat=2.4.8=h96cf925_0
- font-ttf-dejavu-sans-mono=2.37=hab24e00_0
- font-ttf-inconsolata=3.000=h77eed37_0
- font-ttf-source-code-pro=2.038=h77eed37_0
- font-ttf-ubuntu=0.83=hab24e00_0
- fontconfig=2.14.0=h676cef8_0
- fonts-conda-ecosystem=1=0
- fonts-conda-forge=1=0
- freetype=2.10.4=h4cff582_1
- fribidi=1.0.10=hbcb3906_0
- gettext=0.19.8.1=hd1a6beb_1008
- gfortran_impl_osx-64=9.3.0=h9cc0e5e_23
- gfortran_osx-64=9.3.0=h18f7dce_15
- gmp=6.2.1=h2e338ed_0
- graphite2=1.3.13=h2e338ed_1001
- gsl=2.7=h93259b0_0
- harfbuzz=4.2.0=h48644e2_0
- icu=69.1=he49afe7_0
- isl=0.22.1=hb1e8313_2
- jpeg=9e=h5eb16cf_1
- krb5=1.19.3=hb98e516_0
- ld64_osx-64=609=h1e06c2b_10
- lerc=3.0=he49afe7_0
- libblas=3.9.0=15_osx64_openblas
- libcblas=3.9.0=15_osx64_openblas
- libclang-cpp14=14.0.4=default_h55ffa42_0
- libcurl=7.83.1=h23f1065_0
- libcxx=14.0.5=hce7ea42_1
- libdeflate=1.12=hac89ed1_0
- libedit=3.1.20191231=h0678c8f_2
- libev=4.33=haf1e3a3_1
- libffi=3.4.2=h0d85af4_5
- libgfortran=5.0.0=9_3_0_h6c81a4c_23
- libgfortran-devel_osx-64=9.3.0=h6c81a4c_23
- libgfortran5=9.3.0=h6c81a4c_23
- libglib=2.70.2=hf1fb8c0_4
- libiconv=1.16=haf1e3a3_0
- liblapack=3.9.0=15_osx64_openblas
- libllvm13=13.0.1=h64f94b2_2
- libllvm14=14.0.4=h41df66c_0
- libnghttp2=1.47.0=hca56917_0
- libopenblas=0.3.20=openmp_hb3cd9ec_0
- libpng=1.6.37=h7cec526_2
- libssh2=1.10.0=hd3787cc_2
- libtiff=4.4.0=h9847915_1
- libv8=8.9.83=h5fe4d7b_1
- libwebp-base=1.2.2=h0d85af4_1
- libxml2=2.9.12=h7e28ab6_1
- libzlib=1.2.12=hfe4f2af_1
- llvm-openmp=14.0.4=ha654fa7_0
- llvm-tools=14.0.4=h41df66c_0
- lz4-c=1.9.3=he49afe7_1
- make=4.3=h22f3db7_1
- mpc=1.2.1=hbb51d92_0
- mpfr=4.1.0=h0f52abe_1
- ncurses=6.3=h96cf925_1
- nlopt=2.7.1=py310hdbd82a6_1
- numpy=1.22.4=py310hed37afb_0
- openssl=3.0.4=hfe4f2af_2
- pandoc=2.18=h694c41f_0
- pango=1.50.7=hc4a7b6d_0
- pcre=8.45=he49afe7_0
- pcre2=10.37=ha16e1b2_0
- pip=22.1.2=pyhd8ed1ab_0
- pixman=0.40.0=hbcb3906_0
- python=3.10.5=hdd68b96_0_cpython
- python_abi=3.10=2_cp310
- r-abind=1.4_5=r41hc72bb7e_1003
- r-askpass=1.1=r41h28b5c78_2
- r-assertthat=0.2.1=r41hc72bb7e_2
- r-backports=1.4.1=r41h28b5c78_0
- r-base=4.1.3=h56d3809_0
- r-base64enc=0.1_3=r41h28b5c78_1004
- r-bayesm=3.1_4=r41he5a6823_2
- r-bayesplot=1.9.0=r41hc72bb7e_0
- r-bh=1.78.0_0=r41hc72bb7e_0
- r-bit=4.0.4=r41h28b5c78_0
- r-bit64=4.0.5=r41h28b5c78_0
- r-bitops=1.0_7=r41h28b5c78_0
- r-blob=1.2.3=r41hc72bb7e_0
- r-boot=1.3_28=r41hc72bb7e_0
- r-bridgesampling=1.1_2=r41hc72bb7e_0
- r-brio=1.1.3=r41h28b5c78_0
- r-brms=2.17.0=r41hc4bb905_0
- r-brobdingnag=1.2_7=r41hc72bb7e_0
- r-broom=0.8.0=r41hc72bb7e_0
- r-broom.helpers=1.7.0=r41hc72bb7e_0
- r-bslib=0.3.1=r41hc72bb7e_0
- r-cachem=1.0.6=r41h28b5c78_0
- r-callr=3.7.0=r41hc72bb7e_0
- r-caret=6.0_92=r41h0f1d5c4_0
- r-cellranger=1.1.0=r41hc72bb7e_1004
- r-checkmate=2.1.0=r41h0f1d5c4_0
- r-class=7.3_20=r41h28b5c78_0
- r-cli=3.3.0=r41h8619c4b_0
- r-clipr=0.8.0=r41hc72bb7e_0
- r-coda=0.19_4=r41hc72bb7e_0
- r-codetools=0.2_18=r41hc72bb7e_0
- r-colorspace=2.0_3=r41h0f1d5c4_0
- r-colourpicker=1.1.1=r41hc72bb7e_0
- r-commonmark=1.8.0=r41h0f1d5c4_0
- r-compositions=2.0_4=r41h28b5c78_0
- r-cowplot=1.1.1=r41hc72bb7e_0
- r-cpp11=0.4.2=r41hc72bb7e_0
- r-crayon=1.5.1=r41hc72bb7e_0
- r-crosstalk=1.2.0=r41hc72bb7e_0
- r-curl=4.3.2=r41h28b5c78_0
- r-data.table=1.14.2=r41ha76789d_0
- r-dbi=1.1.3=r41hc72bb7e_0
- r-dbplyr=2.2.0=r41hc72bb7e_0
- r-deoptimr=1.0_11=r41hc72bb7e_0
- r-desc=1.4.1=r41hc72bb7e_0
- r-diffobj=0.3.5=r41h28b5c78_0
- r-digest=0.6.29=r41h9951f98_0
- r-dirichletreg=0.7_1=r41h28b5c78_0
- r-distributional=0.3.0=r41hc72bb7e_0
- r-dplyr=1.0.9=r41h8619c4b_0
- r-dt=0.23=r41hc72bb7e_0
- r-dtplyr=1.2.1=r41hc72bb7e_0
- r-dygraphs=1.1.1.6=r41hc72bb7e_1003
- r-e1071=1.7_11=r41h8619c4b_0
- r-ellipsis=0.3.2=r41h28b5c78_0
- r-evaluate=0.15=r41hc72bb7e_0
- r-fansi=1.0.3=r41h0f1d5c4_0
- r-farver=2.1.0=r41h9951f98_0
- r-fastmap=1.1.0=r41h9951f98_0
- r-fontawesome=0.2.2=r41hc72bb7e_0
- r-forcats=0.5.1=r41hc72bb7e_0
- r-foreach=1.5.2=r41hc72bb7e_0
- r-formula=1.2_4=r41hc72bb7e_0
- r-fs=1.5.2=r41hc4bb905_1
- r-future=1.26.1=r41hc72bb7e_0
- r-future.apply=1.9.0=r41hc72bb7e_0
- r-gamm4=0.2_6=r41hc72bb7e_1
- r-gargle=1.2.0=r41hc72bb7e_0
- r-generics=0.1.2=r41hc72bb7e_0
- r-ggplot2=3.3.6=r41hc72bb7e_0
- r-ggrepel=0.9.1=r41h9951f98_0
- r-ggridges=0.5.3=r41hc72bb7e_0
- r-ggtern=3.3.5=r41hc72bb7e_0
- r-globals=0.15.0=r41hc72bb7e_0
- r-glue=1.6.2=r41h0f1d5c4_0
- r-googledrive=2.0.0=r41hc72bb7e_0
- r-googlesheets4=1.0.0=r41h785f33e_0
- r-gower=1.0.0=r41h28b5c78_0
- r-gridextra=2.3=r41hc72bb7e_1003
- r-gt=0.6.0=r41hc72bb7e_0
- r-gtable=0.3.0=r41hc72bb7e_3
- r-gtools=3.9.2.2=r41h67d6963_0
- r-gtsummary=1.6.1=r41hc72bb7e_0
- r-hardhat=1.1.0=r41hc72bb7e_0
- r-haven=2.5.0=r41h8619c4b_0
- r-hexbin=1.28.2=r41h8e0a2a9_0
- r-highr=0.9=r41hc72bb7e_0
- r-hms=1.1.1=r41hc72bb7e_0
- r-htmltools=0.5.2=r41h9951f98_0
- r-htmlwidgets=1.5.4=r41hc72bb7e_0
- r-httpuv=1.6.5=r41h9951f98_0
- r-httr=1.4.3=r41hc72bb7e_0
- r-ids=1.0.1=r41hc72bb7e_1
- r-igraph=1.3.0=r41hd51be07_0
- r-inline=0.3.19=r41hc72bb7e_0
- r-ipred=0.9_13=r41h67d6963_0
- r-isoband=0.2.5=r41h9951f98_0
- r-iterators=1.0.14=r41hc72bb7e_0
- r-jquerylib=0.1.4=r41hc72bb7e_0
- r-jsonlite=1.8.0=r41h0f1d5c4_0
- r-kernsmooth=2.23_20=r41he19034d_0
- r-knitr=1.39=r41hc72bb7e_0
- r-labeling=0.4.2=r41hc72bb7e_1
- r-labelled=2.9.1=r41hc72bb7e_0
- r-later=1.2.0=r41h9951f98_0
- r-latex2exp=0.9.4=r41hc72bb7e_0
- r-lattice=0.20_45=r41h28b5c78_0
- r-lava=1.6.10=r41hc72bb7e_0
- r-lazyeval=0.2.2=r41h28b5c78_2
- r-lifecycle=1.0.1=r41hc72bb7e_0
- r-listenv=0.8.0=r41hc72bb7e_1
- r-lme4=1.1_29=r41hc4bb905_0
- r-loo=2.5.1=r41hc72bb7e_0
- r-lubridate=1.8.0=r41h9951f98_0
- r-magrittr=2.0.3=r41h0f1d5c4_0
- r-markdown=1.1=r41h28b5c78_1
- r-mass=7.3_57=r41h67d6963_0
- r-matrix=1.4_1=r41ha2825d1_0
- r-matrixstats=0.62.0=r41h0f1d5c4_0
- r-maxlik=1.5_2=r41hc72bb7e_0
- r-mervdown=0.1.1=r41_0
- r-mgcv=1.8_40=r41h60b693f_0
- r-mime=0.12=r41h28b5c78_0
- r-miniui=0.1.1.1=r41hc72bb7e_1002
- r-minqa=1.2.4=r41h1c00d0a_1006
- r-misctools=0.6_26=r41hc72bb7e_1
- r-modelmetrics=1.2.2.2=r41hff6cd7b_1
- r-modelr=0.1.8=r41hc72bb7e_0
- r-munsell=0.5.0=r41hc72bb7e_1004
- r-mvtnorm=1.1_3=r41h749f5a1_0
- r-nleqslv=3.3.2=r41h749f5a1_1006
- r-nlme=3.1_158=r41he3b5f32_0
- r-nloptr=2.0.3=r41h4294f1f_0
- r-nnet=7.3_17=r41h28b5c78_0
- r-numderiv=2016.8_1.1=r41hc72bb7e_3
- r-openssl=2.0.2=r41he24a83a_0
- r-packrat=0.8.0=r41hc72bb7e_0
- r-parallelly=1.32.0=r41hc72bb7e_0
- r-pillar=1.7.0=r41hc72bb7e_0
- r-pkgbuild=1.3.1=r41hc72bb7e_0
- r-pkgconfig=2.0.3=r41hc72bb7e_1
- r-pkgload=1.2.4=r41h9951f98_0
- r-plyr=1.8.7=r41hc4bb905_0
- r-posterior=1.2.2=r41hc72bb7e_0
- r-praise=1.0.0=r41hc72bb7e_1005
- r-prettyunits=1.1.1=r41hc72bb7e_1
- r-proc=1.18.0=r41h9951f98_0
- r-processx=3.6.1=r41h67d6963_0
- r-prodlim=2019.11.13=r41h9951f98_1
- r-progress=1.2.2=r41hc72bb7e_2
- r-progressr=0.10.1=r41hc72bb7e_0
- r-projpred=2.1.2=r41h8619c4b_0
- r-promises=1.2.0.1=r41h9951f98_0
- r-proto=1.0.0=r41ha770c72_2003
- r-proxy=0.4_27=r41h67d6963_0
- r-ps=1.7.1=r41h67d6963_0
- r-purrr=0.3.4=r41h28b5c78_1
- r-r6=2.5.1=r41hc72bb7e_0
- r-randomforest=4.7_1.1=r41he3b5f32_0
- r-rappdirs=0.3.3=r41h28b5c78_0
- r-rcolorbrewer=1.1_3=r41h785f33e_0
- r-rcpp=1.0.8.3=r41hc4bb905_0
- r-rcpparmadillo=0.11.2.0.0=r41h3fca91c_0
- r-rcppeigen=0.3.3.9.2=r41hde7ee74_0
- r-rcppparallel=5.1.5=r41h9951f98_0
- r-readr=2.1.2=r41h9951f98_0
- r-readxl=1.4.0=r41h45e8629_0
- r-recipes=0.2.0=r41hc72bb7e_0
- r-rematch=1.0.1=r41hc72bb7e_1004
- r-rematch2=2.1.2=r41hc72bb7e_1
- r-reprex=2.0.1=r41hc72bb7e_0
- r-reshape2=1.4.4=r41h9951f98_1
- r-rlang=1.0.2=r41hc4bb905_0
- r-rmarkdown=2.14=r41hc72bb7e_0
- r-robustbase=0.95_0=r41h6f100c1_0
- r-rpart=4.1.16=r41h28b5c78_0
- r-rprojroot=2.0.3=r41hc72bb7e_0
- r-rsconnect=0.8.26=r41hc72bb7e_0
- r-rstan=2.21.5=r41hc4bb905_0
- r-rstantools=2.2.0=r41hc4bb905_0
- r-rstudioapi=0.13=r41hc72bb7e_0
- r-rvest=1.0.2=r41hc72bb7e_0
- r-sandwich=3.0_2=r41hc72bb7e_0
- r-sass=0.4.1=r41hc4bb905_0
- r-scales=1.2.0=r41hc72bb7e_0
- r-selectr=0.4_2=r41hc72bb7e_1
- r-shiny=1.7.1=r41h785f33e_0
- r-shinyjs=2.1.0=r41hc72bb7e_0
- r-shinystan=2.6.0=r41hc72bb7e_0
- r-shinythemes=1.2.0=r41hc72bb7e_0
- r-sourcetools=0.1.7=r41h9951f98_1002
- r-squarem=2021.1=r41hc72bb7e_0
- r-stanheaders=2.21.0_7=r41h1c00d0a_0
- r-statmod=1.4.36=r41h0661a58_0
- r-stringi=1.7.6=r41h99ba7f4_1
- r-stringr=1.4.0=r41hc72bb7e_2
- r-survival=3.3_1=r41h0f1d5c4_0
- r-sys=3.4=r41h28b5c78_0
- r-tensora=0.36.2=r41h28b5c78_0
- r-testthat=3.1.4=r41h8619c4b_0
- r-threejs=0.3.3=r41hc72bb7e_1
- r-tibble=3.1.7=r41h67d6963_0
- r-tidyr=1.2.0=r41h9951f98_0
- r-tidyselect=1.1.2=r41hbe3e9c8_0
- r-tidyverse=1.3.1=r41hc72bb7e_0
- r-timedate=3043.102=r41hc72bb7e_1002
- r-tinytex=0.40=r41hc72bb7e_0
- r-tzdb=0.3.0=r41hc4bb905_0
- r-utf8=1.2.2=r41h28b5c78_0
- r-uuid=1.1_0=r41h0f1d5c4_0
- r-v8=4.2.0=r41he06a7a2_0
- r-vctrs=0.4.1=r41hc4bb905_0
- r-viridislite=0.4.0=r41hc72bb7e_0
- r-vroom=1.5.7=r41h9951f98_0
- r-waldo=0.4.0=r41hc72bb7e_0
- r-withr=2.5.0=r41hc72bb7e_0
- r-writexl=1.4.0=r41h28b5c78_0
- r-xfun=0.31=r41h8619c4b_0
- r-xml2=1.3.3=r41h9951f98_0
- r-xtable=1.8_4=r41hc72bb7e_3
- r-xts=0.12.1=r41h0f1d5c4_0
- r-yaml=2.3.5=r41h0f1d5c4_0
- r-zoo=1.8_10=r41h0f1d5c4_0
- readline=8.1.2=h3899abd_0
- setuptools=62.6.0=py310h2ec42d9_0
- sigtool=0.1.3=h88f4db0_0
- sqlite=3.38.5=hd9f0692_0
- tapi=1100.0.11=h9ce4665_0
- tk=8.6.12=h5dbffcc_0
- tktable=2.10=h49f0cf7_3
- tzdata=2022a=h191b570_0
- wheel=0.37.1=pyhd8ed1ab_0
- xz=5.2.5=haf1e3a3_1
- zlib=1.2.12=hfe4f2af_1
- zstd=1.5.2=ha9df2e0_1
prefix: /Users/mfansler/miniconda3/envs/brms_r41